
import requests
import parsel
import json
from redis import StrictRedis
import datetime
def main():
    #爬取百度热搜风云榜
    url='http://top.baidu.com/category?c=12'
    #搜索网址

    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}#伪装
    #伪装爬虫
    r = requests.get(url , headers=headers)   #请求网站
    body = r.content.decode('utf-8')
    sel = parsel.Selector(body)
    name = sel.css('.c-single-text-ellipsis::text').getall()
    hot_score = sel.css(".hot-score_1f2_w::text").getall()
    link = sel.css('.item-wrap_2oCLZ::attr(href)').getall()
    data_list=[]
    for i in range(0,20,2):
        data={}
        data['name'] = name[i]
        data['number'] = hot_score[i].replace('万','0000')
        data_list.append(data)
    for i in range(0,10):
        data_list[i]['link']= link[i]
    redis = StrictRedis(host='localhost', port=6379, decode_responses=True)
    save_redis(redis,data_list)

def save_redis(redis, data_list):
    redis.delete('baidu_hot')
    for o in data_list:
        # s = json.dumps(o.__dict__, ensure_ascii=False)
        s = json.dumps(o, ensure_ascii=False)
        print(s)
        redis.rpush('baidu_hot', s)
if __name__ == '__main__':
    main()